Section 4 AB-effects (metagenomes)

4.1 Load in variants

vcfToDataframe <- function(vcf.files, contig_mapping = contig_mapping, 
    gff.df = gff.df) {
    require(vcfR)
    res <- list()
    for (file in vcf.files) {
        library(data.table)
        vcf.content <- vcfR::read.vcfR(file, verbose = FALSE)
        vcf.fix <- as.data.frame(vcf.content@fix)  # contains chr, position and substitution informations
        vcf.info <- vcfR::INFO2df(vcf.content)  # get INFO field, contains DP, AF informations
        if (nrow(vcf.fix) > 0) {
            # there are variants
            dat <- as.data.frame(cbind(vcf.fix[, c(1, 2, 4, 
                5, 6)], vcf.info[, c(1, 2)]))
            dat$majorAF <- sapply(dat$AF, minorAfToMajorAf)  # transforms e.g. AF of 0.1 to 0.9, 0.9 stays 0.9 and 0.5 stays 0.5
            dat$genome <- contig_mapping[match(dat$CHROM, contig_mapping$contig), 
                ]$genome  # map chr information to genome name e.g. NHMU01000001.1 -> i48
            dat$genome_hr <- translateGenomeIdToFullName(tolower(dat$genome))
            dat$mouse.id <- substr(tools::file_path_sans_ext(basename(file)), 
                1, 4)
            dat$mouse.group <- translateMouseIdToTreatmentGroup(dat$mouse.id)
            dat$day <- as.integer(substr(basename(file), 6, 
                7))
            dat$phase <- binDaysByPhase(as.numeric(as.matrix(dat$day)))
            dat$phase_num <- binDaysByPhaseGroup(dat$day)
            dat$dp <- as.numeric(as.matrix(vcf.info$DP))
            # annotate overlay of gene
            dt.gff <- data.table(start = gff.df$start, end = gff.df$end, 
                chr = as.character(as.matrix(gff.df$chr)), 
                feature = gff.df$product)
            colnames(dat)[1:2] <- c("chr", "start")
            dat$start <- as.integer(as.matrix(dat$start))
            dat$chr <- as.character(as.matrix(dat$chr))
            dat$end <- dat$start
            dat2 <- as.data.table(dat)
            setkey(dt.gff, chr, start, end)
            annotated <- foverlaps(dat2, dt.gff, type = "within", 
                mult = "first")
            res[[tools::file_path_sans_ext(basename(file))]] <- annotated  # add vcf df to list
        } else {
            message(paste("Skipping", file))
        }
    }
    df <- as.data.frame(do.call(rbind, res))  # merge list to df
    return(df)
}
## data/references/joined_reference_curated_ecoli/joined_reference_curated_ecoli.gff
## Skipping out_philipp/all_vcf/1683d09_S49.vcf

4.4 number of variants per samples

4.4.1 number of variants per treatment group

Figure 4.3: number of variants of all 12 OMM genomes by mouse

4.5 Heatmap

All mice

## Warning in dcast(dat, variant.id ~ sample.id, value.var
## = "AF"): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the
## reshape2::dcast; please note that reshape2 is deprecated,
## and this redirection is now deprecated as well. Please do
## this redirection yourself like reshape2::dcast(dat). In
## the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of
# samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10), ]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 
    0.5)), ]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + 
    phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-", annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, 
    NA)
ord = data.frame(day = heat3.day, mouse.id = heat3.mouse.id)

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id), ]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id), 
    ]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-", qpcr$day)
qpcr <- qpcr[, -c(1:5)]

qpcr <- apply(qpcr, 1, function(x) x/sum(x))

qpcr <- qpcr[, which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[, match(colnames(heat3), colnames(qpcr))]

# pdf('heat.pdf', width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE, top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3), 
    smooth = TRUE, border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1, gp = gpar(fill = 1:12), height = unit(3, 
        "cm")), mouse = heat3.mouse.id, group = heat3.mouse.group, 
    phase = heat3.phase, day = anno_simple(heat3.day, pch = heat3.phase2)), 
    cluster_columns = F, column_order = order(ord$occ, ord$mouse.id, 
        ord$day), right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))), 
    row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), column_split = heat3.mouse.group, 
    column_names_gp = gpar(fontsize = 5), row_names_gp = gpar(fontsize = 3), 
    show_row_dend = F, show_row_names = F, show_column_dend = F)

All mice clustered

## Warning in dcast(dat, variant.id ~ sample.id, value.var
## = "AF"): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the
## reshape2::dcast; please note that reshape2 is deprecated,
## and this redirection is now deprecated as well. Please do
## this redirection yourself like reshape2::dcast(dat). In
## the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of
# samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10), ]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 
    0.5)), ]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + 
    phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-", annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, 
    NA)
ord = data.frame(day = heat3.day, mouse.id = heat3.mouse.id)

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id), ]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id), 
    ]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-", qpcr$day)
qpcr <- qpcr[, -c(1:5)]

qpcr <- apply(qpcr, 1, function(x) x/sum(x))

qpcr <- qpcr[, which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[, match(colnames(heat3), colnames(qpcr))]

# pdf('heat.pdf', width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE, top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3), 
    smooth = TRUE, border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1, gp = gpar(fill = 1:12), height = unit(3, 
        "cm")), mouse = heat3.mouse.id, group = heat3.mouse.group, 
    phase = heat3.phase, day = anno_simple(heat3.day, pch = heat3.phase2)), 
    cluster_columns = T, right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))), 
    row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), column_names_gp = gpar(fontsize = 5), 
    row_names_gp = gpar(fontsize = 3), show_row_dend = F, show_row_names = F, 
    show_column_dend = T)

## Warning in dcast(dat, variant.id ~ sample.id, value.var
## = "AF"): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the
## reshape2::dcast; please note that reshape2 is deprecated,
## and this redirection is now deprecated as well. Please do
## this redirection yourself like reshape2::dcast(dat). In
## the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of
# samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10), ]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 
    0.5)), ]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + 
    phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-", annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, 
    NA)
ord = data.frame(day = heat3.day, mouse.id = heat3.mouse.id)

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id), ]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id), 
    ]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-", qpcr$day)
qpcr <- qpcr[, -c(1:5)]

qpcr <- t(qpcr)

qpcr <- qpcr[, which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[, match(colnames(heat3), colnames(qpcr))]

# pdf('heat.pdf', width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE, top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3), 
    smooth = TRUE, border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1, gp = gpar(fill = 1:12), height = unit(3, 
        "cm")), mouse = heat3.mouse.id, group = heat3.mouse.group, 
    phase = heat3.phase, day = anno_simple(heat3.day, pch = heat3.phase2)), 
    cluster_columns = T, right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))), 
    row_gap = unit(0, "mm"), column_gap = unit(0, "mm"), column_names_gp = gpar(fontsize = 5), 
    row_names_gp = gpar(fontsize = 3), show_row_dend = F, show_row_names = F, 
    show_column_dend = T)

4.6 Focus on mouse where we have many time points

## Warning in dcast(dat, variant.id ~ sample.id, value.var
## = "AF"): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the
## reshape2::dcast; please note that reshape2 is deprecated,
## and this redirection is now deprecated as well. Please do
## this redirection yourself like reshape2::dcast(dat). In
## the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <- data.wide$variant.id
data.wide$variant.id <- NULL

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of
# samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10), ]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num > quantile(heat_var_num, 
    0.5)), ]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + 
    phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-", annot.data$day)

heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$day

ord = data.frame(day = heat3.day, mouse.id = heat3.mouse.id)

heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id), 
    ]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, 
    NA)

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

# order the heatmap by treatment group

Heatmap(heat3, name = "AF", col = col_fun, border = TRUE, top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3), 
    smooth = TRUE, border = TRUE), day = anno_simple(heat3.day, 
    pch = heat3.phase2)), cluster_columns = F, column_order = order(ord$mouse.id, 
    ord$day), column_split = heat3.mouse.group, column_names_gp = gpar(fontsize = 5), 
    row_names_gp = gpar(fontsize = 8), show_row_dend = F, show_row_names = F, 
    show_column_dend = F)

4.7 Akkermansia Muciniphila

4.7.1 area plot 1

## Warning in dcast(dat, day + mouse.id + mouse.group ~
## variant.id, value.var = "AF"): The dcast generic in
## data.table has been passed a data.frame and will attempt
## to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now
## deprecated as well. Please do this redirection yourself
## like reshape2::dcast(dat). In the next version, this
## warning will become an error.
## Warning in melt(data.wide.reduced, id.vars = c("day",
## "mouse.id", "mouse.group")): The melt generic in
## data.table has been passed a data.frame and will attempt
## to redirect to the relevant reshape2 method; please
## note that reshape2 is deprecated, and this redirection
## is now deprecated as well. To continue using melt
## methods from reshape2 while both libraries are attached,
## e.g. melt.list, you can prepend the namespace like
## reshape2::melt(data.wide.reduced). In the next version,
## this warning will become an error.

4.7.2 line plot

## Warning in dcast(dat, day + mouse.id + mouse.group ~
## variant.id, value.var = "AF"): The dcast generic in
## data.table has been passed a data.frame and will attempt
## to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now
## deprecated as well. Please do this redirection yourself
## like reshape2::dcast(dat). In the next version, this
## warning will become an error.
## Warning in melt(data.wide, id.vars = c("day", "mouse.id",
## "mouse.group")): The melt generic in data.table has been
## passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is
## deprecated, and this redirection is now deprecated as
## well. To continue using melt methods from reshape2 while
## both libraries are attached, e.g. melt.list, you can
## prepend the namespace like reshape2::melt(data.wide). In
## the next version, this warning will become an error.